library(SingleCellExperiment)
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
##
## rowMedians
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
library(scran)
## Loading required package: scuttle
library(scater)
## Loading required package: ggplot2
sce <- readRDS("../data/SC2018/231115_sceAfterCellTypeAnnotation.rds")
Authors recover three subclusters. Naive B cells (IGHD+, CD27-), quiescent memory B cells (CD27+, IGHG1+, IGHA1+) and plasma cells (IGHA+, IGHG+, CD38+, MS4A1-)
sceB <- sce[,sce$cellType == "B-cell"]
sceB <- logNormCounts(sceB)
dec <- modelGeneVar(sceB)
hvgB <- getTopHVGs(dec, prop=0.05)
set.seed(1234)
sceB <- runPCA(sceB,
ncomponents=10,
subset_row=hvgB)
sceB <- runUMAP(sceB,
dimred = 'PCA',
external_neighbors = TRUE,
ncomponents = 10,
min_dist=0.4)
plotUMAP(sceB)
plotUMAP(sceB, colour_by="IGHD")
plotUMAP(sceB, colour_by="CD27")
plotUMAP(sceB, colour_by="IGHG1")
plotUMAP(sceB, colour_by="IGHA1")
plotUMAP(sceB, colour_by="CD38")
plotUMAP(sceB, colour_by="MS4A1")
set.seed(464688)
# Build a shared nearest-neighbor graph from PCA space
graph <- buildSNNGraph(sceB,
use.dimred = 'PCA')
# Leiden clustering on the SNN graph
cluster_leidenB <- factor(igraph::cluster_leiden(graph = graph,
resolution_parameter = 0.05)$membership)
nlevels(cluster_leidenB)
## [1] 6
colData(sceB)$cluster_leidenB <- cluster_leidenB
plotUMAP(sceB, colour_by="cluster_leidenB")
plotReducedDim(sceB,
dimred = "UMAP_fastMNNCorrected",
point_size=1/2,
colour_by="IGHD") +
xlim(c(2,4)) +
ylim(c(7.5,12))
## Warning: Removed 57 rows containing missing values (`geom_point()`).
plotReducedDim(sceB,
dimred = "UMAP_fastMNNCorrected",
point_size=1/2,
colour_by="CD27") +
xlim(c(2,4)) +
ylim(c(7.5,12))
## Warning: Removed 57 rows containing missing values (`geom_point()`).
plotReducedDim(sceB,
dimred = "UMAP_fastMNNCorrected",
point_size=1/2,
colour_by="IGHG1") +
xlim(c(2,4)) +
ylim(c(7.5,12))
## Warning: Removed 57 rows containing missing values (`geom_point()`).
plotReducedDim(sceB,
dimred = "UMAP_fastMNNCorrected",
point_size=1/2,
colour_by="IGHA1") +
xlim(c(2,4)) +
ylim(c(7.5,12))
## Warning: Removed 57 rows containing missing values (`geom_point()`).
plotReducedDim(sceB,
dimred = "UMAP_fastMNNCorrected",
point_size=1/2,
colour_by="CD38") +
xlim(c(2,4)) +
ylim(c(7.5,12))
## Warning: Removed 57 rows containing missing values (`geom_point()`).
plotReducedDim(sceB,
dimred = "UMAP_fastMNNCorrected",
point_size=1/2,
colour_by="MS4A1") +
xlim(c(2,4)) +
ylim(c(7.5,12))
## Warning: Removed 57 rows containing missing values (`geom_point()`).
sceT <- sce[,sce$cellType == "T-cell"]
sceT <- logNormCounts(sceT)
dec <- modelGeneVar(sceT)
hvgT <- getTopHVGs(dec, prop=0.05)
set.seed(1234)
sceT <- runPCA(sceT,
ncomponents=10,
subset_row=hvgB)
sceT <- runUMAP(sceT,
dimred = 'PCA',
external_neighbors = TRUE,
ncomponents = 10,
min_dist=0.2)
plotUMAP(sceT)
plotUMAP(sceT, colour_by="GZMB")
plotUMAP(sceT, colour_by="GZMH")
plotUMAP(sceT, colour_by="CCR7")
plotUMAP(sceT, colour_by="SELL")
set.seed(464688)
# Build a shared nearest-neighbor graph from PCA space
graph <- buildSNNGraph(sceT,
use.dimred = 'PCA')
# Leiden clustering on the SNN graph
cluster_leidenT <- factor(igraph::cluster_leiden(graph = graph,
resolution_parameter = 0.001)$membership)
nlevels(cluster_leidenT)
## [1] 2
colData(sceT)$cluster_leidenT <- cluster_leidenT
plotUMAP(sceT, colour_by="cluster_leidenT")
cellState <- sce$cellType
cellState[sce$cellType == "B-cell"] <- paste0("B",cluster_leidenB)
cellState[sce$cellType == "T-cell"] <- paste0("T",cluster_leidenT)
sce$cellState <- cellState
plotReducedDim(sce,
dimred = "UMAP_fastMNNCorrected",
point_size=1/2,
colour_by="cellState") +
scale_color_manual(values=dayjob::paletteDiscrete(values=unique(sce$cellState), set="bear"))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
library(dayjob)
cellCountsLong <- getCellPopulationCounts(sce,
patientVar = "sample",
cellTypeVar = "cellState",
group = "condition",
format="long")
## Loading required package: tidyverse
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ tibble 3.1.8 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ✔ purrr 0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::collapse() masks IRanges::collapse()
## ✖ dplyr::combine() masks Biobase::combine(), BiocGenerics::combine()
## ✖ dplyr::count() masks matrixStats::count()
## ✖ dplyr::desc() masks IRanges::desc()
## ✖ tidyr::expand() masks S4Vectors::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::first() masks S4Vectors::first()
## ✖ dplyr::lag() masks stats::lag()
## ✖ ggplot2::Position() masks BiocGenerics::Position(), base::Position()
## ✖ purrr::reduce() masks GenomicRanges::reduce(), IRanges::reduce()
## ✖ dplyr::rename() masks S4Vectors::rename()
## ✖ dplyr::slice() masks IRanges::slice()
cellCountsWide <- getCellPopulationCounts(sce,
patientVar = "sample",
cellTypeVar = "cellState",
group = "condition",
format="wide")
countMatrix <- cellCountsWide[,-c(1:2)]
countMatrix <- t(as.matrix(countMatrix))
cellCountsLong <- cellCountsLong %>%
group_by(patient, group) %>%
mutate(totalCells = sum(nCells),
nCellTypes = n(),
geoMean = prod(nCells+1)^(1/nCellTypes),
CLR = log((nCells+1) / geoMean )) %>%
ungroup() %>%
mutate(fractionCells = nCells / totalCells)
ggplot(cellCountsLong, aes(x=celltype, y=fractionCells, fill=group)) +
geom_boxplot(outlier.size = 0) +
geom_point(position = position_dodge(width = .75)) +
theme_classic()
ggplot(cellCountsLong, aes(x=celltype, y=CLR, fill=group)) +
geom_boxplot(outlier.size = 0) +
geom_point(position = position_dodge(width = .75)) +
theme_classic()
library(limma)
##
## Attaching package: 'limma'
## The following object is masked from 'package:scater':
##
## plotMDS
## The following object is masked from 'package:BiocGenerics':
##
## plotMA
library(voomCLR)
design <- model.matrix( ~ group, data=cellCountsWide)
v <- voomCLR(counts = countMatrix,
design = design)
fit <- lmFit(v, design)
plotBeta(fit)
fit <- applyBiasCorrection(fit)
fit <- eBayes(fit)
tt <- topTable(fit, coef=2, n=Inf)
tt
## logFC AveExpr t P.Value adj.P.Val
## T1 1.771686e+00 2.77490391 5.401473e+00 2.724764e-05 0.0003542193
## T2 -1.184069e+00 2.71762506 -4.046665e+00 6.277716e-04 0.0040805152
## B4 -1.697260e+00 -0.31301613 -2.813365e+00 1.071560e-02 0.0464342628
## B3 -1.651372e+00 -0.02736908 -2.638513e+00 1.573147e-02 0.0511272828
## Megakaryocyte -1.300596e+00 -2.53369553 -2.119965e+00 4.667379e-02 0.1213518537
## B5 -1.371930e+00 -3.33299303 -1.583402e+00 1.289713e-01 0.2395181974
## NK 8.543210e-01 2.70712063 2.016354e+00 5.735339e-02 0.1242656808
## Erythrocyte 6.835507e-01 -0.99397083 9.709046e-01 3.431598e-01 0.4461077694
## B1 -4.524239e-01 0.05428516 -1.141569e+00 2.670800e-01 0.3857821746
## B6 -2.417788e-06 -3.65020694 -2.955746e-06 9.999977e-01 0.9999976709
## CD14 Monocyte 3.353294e-01 2.55956207 1.223873e+00 2.351836e-01 0.3821733661
## B2 -1.698421e-01 -0.53385765 -3.629499e-01 7.204393e-01 0.8514282439
## CD16 Monocyte -4.613844e-02 0.57161236 -1.080049e-01 9.150650e-01 0.9913203775
## B
## T1 2.5526362
## T2 -0.5634483
## B4 -2.9246573
## B3 -3.3045385
## Megakaryocyte -3.9842333
## B5 -4.7413960
## NK -4.7888960
## Erythrocyte -5.6551917
## B1 -5.6789602
## B6 -5.8227646
## CD14 Monocyte -5.9612331
## B2 -6.1316324
## CD16 Monocyte -6.4134795
library(glmmTMB)
## Warning in checkMatrixPackageVersion(): Package version inconsistency detected.
## TMB was built with Matrix version 1.4.1
## Current Matrix version is 1.5.1
## Please re-install 'TMB' from source using install.packages('TMB', type = 'source') or ask CRAN for a binary version of 'TMB' matching CRAN's 'Matrix' package
## Warning in checkDepPackageVersion(dep_pkg = "TMB"): Package version inconsistency detected.
## glmmTMB was built with TMB version 1.9.0
## Current TMB version is 1.9.1
## Please re-install glmmTMB from source or restore original 'TMB' package (see '?reinstalling' for more information)
resDfNB <- data.frame(population=rownames(countMatrix),
diff=rep(NA,nrow(countMatrix)),
se=rep(NA,nrow(countMatrix)),
pval=rep(NA,nrow(countMatrix)))
for(pp in 1:nrow(countMatrix)){
curY <- countMatrix[pp,]
m_p <- glmmTMB(curY ~ -1 + design,
family=nbinom2(link="log"),
offset = log(colSums(countMatrix)))
resDfNB[pp,2:4] <- c(summary(m_p)$coef$cond["designgroupSC",c(1,2,4)])
}
## Warning in glmmTMB(curY ~ -1 + design, family = nbinom2(link = "log"), offset =
## log(colSums(countMatrix))): use of the 'data' argument is recommended
## Warning in glmmTMB(curY ~ -1 + design, family = nbinom2(link = "log"), offset =
## log(colSums(countMatrix))): use of the 'data' argument is recommended
## Warning in glmmTMB(curY ~ -1 + design, family = nbinom2(link = "log"), offset =
## log(colSums(countMatrix))): use of the 'data' argument is recommended
## Warning in glmmTMB(curY ~ -1 + design, family = nbinom2(link = "log"), offset =
## log(colSums(countMatrix))): use of the 'data' argument is recommended
## Warning in glmmTMB(curY ~ -1 + design, family = nbinom2(link = "log"), offset =
## log(colSums(countMatrix))): use of the 'data' argument is recommended
## Warning in glmmTMB(curY ~ -1 + design, family = nbinom2(link = "log"), offset =
## log(colSums(countMatrix))): use of the 'data' argument is recommended
## Warning in glmmTMB(curY ~ -1 + design, family = nbinom2(link = "log"), offset =
## log(colSums(countMatrix))): use of the 'data' argument is recommended
## Warning in glmmTMB(curY ~ -1 + design, family = nbinom2(link = "log"), offset =
## log(colSums(countMatrix))): use of the 'data' argument is recommended
## Warning in glmmTMB(curY ~ -1 + design, family = nbinom2(link = "log"), offset =
## log(colSums(countMatrix))): use of the 'data' argument is recommended
## Warning in glmmTMB(curY ~ -1 + design, family = nbinom2(link = "log"), offset =
## log(colSums(countMatrix))): use of the 'data' argument is recommended
## Warning in glmmTMB(curY ~ -1 + design, family = nbinom2(link = "log"), offset =
## log(colSums(countMatrix))): use of the 'data' argument is recommended
## Warning in glmmTMB(curY ~ -1 + design, family = nbinom2(link = "log"), offset =
## log(colSums(countMatrix))): use of the 'data' argument is recommended
## Warning in glmmTMB(curY ~ -1 + design, family = nbinom2(link = "log"), offset =
## log(colSums(countMatrix))): use of the 'data' argument is recommended
resDfNB$padj <- p.adjust(resDfNB$pval, "fdr")
resDfNB[order(abs(resDfNB$pval)),]
## population diff se pval padj
## 11 T1 1.5047700 0.2432410 6.157942e-10 8.005324e-09
## 12 T2 -1.2351164 0.2993443 3.690096e-05 2.398562e-04
## 4 B4 -2.1082308 0.5336825 7.803806e-05 3.381649e-04
## 5 B5 -4.0378287 1.1432488 4.126013e-04 1.340954e-03
## 3 B3 -1.8446224 0.6062794 2.345995e-03 6.099587e-03
## 9 Megakaryocyte -1.3405902 0.5577411 1.623423e-02 3.517417e-02
## 1 B1 -0.6595095 0.2991692 2.749131e-02 5.105530e-02
## 8 Erythrocyte 1.0008854 0.6247627 1.091501e-01 1.773689e-01
## 13 B6 -1.9497944 1.4289397 1.724089e-01 2.490350e-01
## 10 NK 0.4232895 0.3529722 2.304445e-01 2.995778e-01
## 7 CD16 Monocyte -0.2754860 0.3269730 3.994889e-01 4.199045e-01
## 6 CD14 Monocyte 0.1668012 0.2067743 4.198494e-01 4.199045e-01
## 2 B2 -0.3128874 0.3879153 4.199045e-01 4.199045e-01
library(edgeR)
##
## Attaching package: 'edgeR'
## The following object is masked from 'package:SingleCellExperiment':
##
## cpm
d <- DGEList(countMatrix)
d <- calcNormFactors(d)
d <- estimateDisp(d, design)
fit <- glmFit(d, design)
lrt <- glmLRT(fit, coef=2)
lrt$table$padj <- p.adjust(lrt$table$PValue, method="BH")
lrt$table[order(lrt$table$LR, decreasing=TRUE),]
## logFC logCPM LR PValue padj
## T1 3.13365558 18.47514 21.163945959 4.216219e-06 5.481085e-05
## B4 -2.67764773 14.31439 10.577903319 1.144474e-03 7.439080e-03
## B3 -2.62577570 14.76121 9.751696154 1.791573e-03 7.763484e-03
## T2 -1.32223773 17.69649 9.021980269 2.667522e-03 8.669448e-03
## B5 -4.37390459 11.65692 6.942957708 8.414986e-03 2.187896e-02
## Erythrocyte 2.05201491 13.36015 4.010259975 4.522418e-02 9.798572e-02
## CD14 Monocyte 0.71460969 17.29130 3.605029604 5.760504e-02 1.069808e-01
## NK 1.09614669 18.00146 2.335210158 1.264779e-01 2.055266e-01
## Megakaryocyte -1.06441856 10.85509 1.336585335 2.476371e-01 3.576980e-01
## B6 -1.94660919 10.48310 1.181032611 2.771459e-01 3.602896e-01
## B1 -0.43796408 13.74097 0.820729823 3.649669e-01 4.313245e-01
## CD16 Monocyte 0.47294014 14.91270 0.391993239 5.312535e-01 5.755247e-01
## B2 0.05942517 13.03295 0.009459798 9.225187e-01 9.225187e-01
library(MicrobiomeStat)
library(phyloseq)
##
## Attaching package: 'phyloseq'
## The following object is masked from 'package:SummarizedExperiment':
##
## distance
## The following object is masked from 'package:Biobase':
##
## sampleNames
## The following object is masked from 'package:GenomicRanges':
##
## distance
## The following object is masked from 'package:IRanges':
##
## distance
lindaRes <- LinDA::linda(otu.tab = countMatrix, # rows features, cols samples
meta = as.data.frame(cellCountsWide),
formula = '~ group',
type = 'count',
adaptive=TRUE,
imputation = FALSE)
## Imputation approach is used.
lindaRes$output$groupSC[order(abs(lindaRes$output$groupSC$stat), decreasing=TRUE),]
## baseMean log2FoldChange lfcSE stat pvalue
## T1 88627.7112 2.57099951 0.4292389 5.98967010 0.0001339422
## T2 466082.3323 -1.67532194 0.4151763 -4.03520567 0.0023794988
## Megakaryocyte 2524.9289 -2.00848239 0.7625106 -2.63403872 0.0249883399
## B4 31049.5217 -2.59551271 1.0333052 -2.51185488 0.0308144999
## B3 40381.0392 -2.49607224 1.0816040 -2.30775053 0.0436743400
## NK 143161.3308 1.21775589 0.7168240 1.69882130 0.1201965858
## CD14 Monocyte 163411.1792 0.52587414 0.3293556 1.59667609 0.1414219048
## B1 21301.0870 -0.65483541 0.4163475 -1.57280972 0.1468383840
## B5 1138.7079 -1.81557902 1.3366131 -1.35834295 0.2042072827
## Erythrocyte 3518.6888 1.05254155 1.2157721 0.86573916 0.4069239522
## B2 10078.6410 -0.28523903 0.5414475 -0.52680831 0.6098128715
## CD16 Monocyte 28290.0293 -0.07279074 0.6410964 -0.11354103 0.9118487789
## B6 434.8029 0.05599086 1.0784249 0.05191911 0.9596156384
## padj reject df
## T1 0.001741249 TRUE 10
## T2 0.015466742 TRUE 10
## Megakaryocyte 0.100147125 FALSE 10
## B4 0.100147125 FALSE 10
## B3 0.113553284 FALSE 10
## NK 0.238612374 FALSE 10
## CD14 Monocyte 0.238612374 FALSE 10
## B1 0.238612374 FALSE 10
## B5 0.294966075 FALSE 10
## Erythrocyte 0.529001138 FALSE 10
## B2 0.720687939 FALSE 10
## CD16 Monocyte 0.959615638 FALSE 10
## B6 0.959615638 FALSE 10
allPopulations <- rownames(countMatrix)
nPopulations <- nrow(countMatrix)
allMethods <- c("voomCLR", "LinDA", "edgeR", "NBGLM")
nMethods <- length(allMethods)
resDfAll <- data.frame(population = rep(allPopulations, nMethods),
method = rep(allMethods, each=nPopulations),
log2FC = c(tt[allPopulations,]$logFC,
lindaRes$output$groupSC[allPopulations,]$log2FoldChange,
lrt$table[allPopulations,]$logFC,
log2(exp(resDfNB$diff))),
padj = c(tt[allPopulations,]$adj.P.Val,
lindaRes$output$groupSC[allPopulations,]$padj,
lrt$table[allPopulations,]$padj,
resDfNB$padj)
)
resDfAll$DA <- resDfAll$padj <= 0.05
pComp <- ggplot(resDfAll,
aes(x=population, y=method, color=log2FC, size=DA)) +
geom_point()+
facet_grid(~population, scales = "free_x", space = "free")+
theme_light()+
scale_color_gradient2(low= "blue", mid="gray", high= "red") +
theme(axis.text.x = element_text(angle=45, hjust = 1, vjust = 1),
strip.text.x = element_text(size=14, angle=90, hjust = 1, vjust = 1))
pComp
## Warning: Using size for a discrete variable is not advised.
resDfAll$DA <- resDfAll$padj <= 0.06 # to show NB models are also at adjusted p-value around .05
pComp <- ggplot(resDfAll,
aes(x=population, y=method, color=log2FC, size=DA)) +
geom_point()+
facet_grid(~population, scales = "free_x", space = "free")+
theme_light()+
scale_color_gradient2(low= "blue", mid="gray", high= "red") +
theme(axis.text.x = element_text(angle=45, hjust = 1, vjust = 1),
strip.text.x = element_text(size=14, angle=90, hjust = 1, vjust = 1))
pComp
## Warning: Using size for a discrete variable is not advised.